import pandas as pd
import os
os.getcwd()
'C:\\Users\\belwa\\Documents\\Practice'
os.chdir("C:\\Users\\belwa\\OneDrive\\Documents\\0Files\\Learning\\Acmegrade\\Jul 23 DS Day 16\\Projects\\Detection of Parkinsons Disease\\")
df = pd.read_csv("parkinsons.data")
df
| name | MDVP:Fo(Hz) | MDVP:Fhi(Hz) | MDVP:Flo(Hz) | MDVP:Jitter(%) | MDVP:Jitter(Abs) | MDVP:RAP | MDVP:PPQ | Jitter:DDP | MDVP:Shimmer | ... | Shimmer:DDA | NHR | HNR | status | RPDE | DFA | spread1 | spread2 | D2 | PPE | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | phon_R01_S01_1 | 119.992 | 157.302 | 74.997 | 0.00784 | 0.00007 | 0.00370 | 0.00554 | 0.01109 | 0.04374 | ... | 0.06545 | 0.02211 | 21.033 | 1 | 0.414783 | 0.815285 | -4.813031 | 0.266482 | 2.301442 | 0.284654 |
| 1 | phon_R01_S01_2 | 122.400 | 148.650 | 113.819 | 0.00968 | 0.00008 | 0.00465 | 0.00696 | 0.01394 | 0.06134 | ... | 0.09403 | 0.01929 | 19.085 | 1 | 0.458359 | 0.819521 | -4.075192 | 0.335590 | 2.486855 | 0.368674 |
| 2 | phon_R01_S01_3 | 116.682 | 131.111 | 111.555 | 0.01050 | 0.00009 | 0.00544 | 0.00781 | 0.01633 | 0.05233 | ... | 0.08270 | 0.01309 | 20.651 | 1 | 0.429895 | 0.825288 | -4.443179 | 0.311173 | 2.342259 | 0.332634 |
| 3 | phon_R01_S01_4 | 116.676 | 137.871 | 111.366 | 0.00997 | 0.00009 | 0.00502 | 0.00698 | 0.01505 | 0.05492 | ... | 0.08771 | 0.01353 | 20.644 | 1 | 0.434969 | 0.819235 | -4.117501 | 0.334147 | 2.405554 | 0.368975 |
| 4 | phon_R01_S01_5 | 116.014 | 141.781 | 110.655 | 0.01284 | 0.00011 | 0.00655 | 0.00908 | 0.01966 | 0.06425 | ... | 0.10470 | 0.01767 | 19.649 | 1 | 0.417356 | 0.823484 | -3.747787 | 0.234513 | 2.332180 | 0.410335 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 190 | phon_R01_S50_2 | 174.188 | 230.978 | 94.261 | 0.00459 | 0.00003 | 0.00263 | 0.00259 | 0.00790 | 0.04087 | ... | 0.07008 | 0.02764 | 19.517 | 0 | 0.448439 | 0.657899 | -6.538586 | 0.121952 | 2.657476 | 0.133050 |
| 191 | phon_R01_S50_3 | 209.516 | 253.017 | 89.488 | 0.00564 | 0.00003 | 0.00331 | 0.00292 | 0.00994 | 0.02751 | ... | 0.04812 | 0.01810 | 19.147 | 0 | 0.431674 | 0.683244 | -6.195325 | 0.129303 | 2.784312 | 0.168895 |
| 192 | phon_R01_S50_4 | 174.688 | 240.005 | 74.287 | 0.01360 | 0.00008 | 0.00624 | 0.00564 | 0.01873 | 0.02308 | ... | 0.03804 | 0.10715 | 17.883 | 0 | 0.407567 | 0.655683 | -6.787197 | 0.158453 | 2.679772 | 0.131728 |
| 193 | phon_R01_S50_5 | 198.764 | 396.961 | 74.904 | 0.00740 | 0.00004 | 0.00370 | 0.00390 | 0.01109 | 0.02296 | ... | 0.03794 | 0.07223 | 19.020 | 0 | 0.451221 | 0.643956 | -6.744577 | 0.207454 | 2.138608 | 0.123306 |
| 194 | phon_R01_S50_6 | 214.289 | 260.277 | 77.973 | 0.00567 | 0.00003 | 0.00295 | 0.00317 | 0.00885 | 0.01884 | ... | 0.03078 | 0.04398 | 21.209 | 0 | 0.462803 | 0.664357 | -5.724056 | 0.190667 | 2.555477 | 0.148569 |
195 rows × 24 columns
df.head() #head returns top 5 rows of data
| name | MDVP:Fo(Hz) | MDVP:Fhi(Hz) | MDVP:Flo(Hz) | MDVP:Jitter(%) | MDVP:Jitter(Abs) | MDVP:RAP | MDVP:PPQ | Jitter:DDP | MDVP:Shimmer | ... | Shimmer:DDA | NHR | HNR | status | RPDE | DFA | spread1 | spread2 | D2 | PPE | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | phon_R01_S01_1 | 119.992 | 157.302 | 74.997 | 0.00784 | 0.00007 | 0.00370 | 0.00554 | 0.01109 | 0.04374 | ... | 0.06545 | 0.02211 | 21.033 | 1 | 0.414783 | 0.815285 | -4.813031 | 0.266482 | 2.301442 | 0.284654 |
| 1 | phon_R01_S01_2 | 122.400 | 148.650 | 113.819 | 0.00968 | 0.00008 | 0.00465 | 0.00696 | 0.01394 | 0.06134 | ... | 0.09403 | 0.01929 | 19.085 | 1 | 0.458359 | 0.819521 | -4.075192 | 0.335590 | 2.486855 | 0.368674 |
| 2 | phon_R01_S01_3 | 116.682 | 131.111 | 111.555 | 0.01050 | 0.00009 | 0.00544 | 0.00781 | 0.01633 | 0.05233 | ... | 0.08270 | 0.01309 | 20.651 | 1 | 0.429895 | 0.825288 | -4.443179 | 0.311173 | 2.342259 | 0.332634 |
| 3 | phon_R01_S01_4 | 116.676 | 137.871 | 111.366 | 0.00997 | 0.00009 | 0.00502 | 0.00698 | 0.01505 | 0.05492 | ... | 0.08771 | 0.01353 | 20.644 | 1 | 0.434969 | 0.819235 | -4.117501 | 0.334147 | 2.405554 | 0.368975 |
| 4 | phon_R01_S01_5 | 116.014 | 141.781 | 110.655 | 0.01284 | 0.00011 | 0.00655 | 0.00908 | 0.01966 | 0.06425 | ... | 0.10470 | 0.01767 | 19.649 | 1 | 0.417356 | 0.823484 | -3.747787 | 0.234513 | 2.332180 | 0.410335 |
5 rows × 24 columns
df.tail() #tail returns bottom 5 rows of data
| name | MDVP:Fo(Hz) | MDVP:Fhi(Hz) | MDVP:Flo(Hz) | MDVP:Jitter(%) | MDVP:Jitter(Abs) | MDVP:RAP | MDVP:PPQ | Jitter:DDP | MDVP:Shimmer | ... | Shimmer:DDA | NHR | HNR | status | RPDE | DFA | spread1 | spread2 | D2 | PPE | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 190 | phon_R01_S50_2 | 174.188 | 230.978 | 94.261 | 0.00459 | 0.00003 | 0.00263 | 0.00259 | 0.00790 | 0.04087 | ... | 0.07008 | 0.02764 | 19.517 | 0 | 0.448439 | 0.657899 | -6.538586 | 0.121952 | 2.657476 | 0.133050 |
| 191 | phon_R01_S50_3 | 209.516 | 253.017 | 89.488 | 0.00564 | 0.00003 | 0.00331 | 0.00292 | 0.00994 | 0.02751 | ... | 0.04812 | 0.01810 | 19.147 | 0 | 0.431674 | 0.683244 | -6.195325 | 0.129303 | 2.784312 | 0.168895 |
| 192 | phon_R01_S50_4 | 174.688 | 240.005 | 74.287 | 0.01360 | 0.00008 | 0.00624 | 0.00564 | 0.01873 | 0.02308 | ... | 0.03804 | 0.10715 | 17.883 | 0 | 0.407567 | 0.655683 | -6.787197 | 0.158453 | 2.679772 | 0.131728 |
| 193 | phon_R01_S50_5 | 198.764 | 396.961 | 74.904 | 0.00740 | 0.00004 | 0.00370 | 0.00390 | 0.01109 | 0.02296 | ... | 0.03794 | 0.07223 | 19.020 | 0 | 0.451221 | 0.643956 | -6.744577 | 0.207454 | 2.138608 | 0.123306 |
| 194 | phon_R01_S50_6 | 214.289 | 260.277 | 77.973 | 0.00567 | 0.00003 | 0.00295 | 0.00317 | 0.00885 | 0.01884 | ... | 0.03078 | 0.04398 | 21.209 | 0 | 0.462803 | 0.664357 | -5.724056 | 0.190667 | 2.555477 | 0.148569 |
5 rows × 24 columns
df.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 195 entries, 0 to 194 Data columns (total 24 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 name 195 non-null object 1 MDVP:Fo(Hz) 195 non-null float64 2 MDVP:Fhi(Hz) 195 non-null float64 3 MDVP:Flo(Hz) 195 non-null float64 4 MDVP:Jitter(%) 195 non-null float64 5 MDVP:Jitter(Abs) 195 non-null float64 6 MDVP:RAP 195 non-null float64 7 MDVP:PPQ 195 non-null float64 8 Jitter:DDP 195 non-null float64 9 MDVP:Shimmer 195 non-null float64 10 MDVP:Shimmer(dB) 195 non-null float64 11 Shimmer:APQ3 195 non-null float64 12 Shimmer:APQ5 195 non-null float64 13 MDVP:APQ 195 non-null float64 14 Shimmer:DDA 195 non-null float64 15 NHR 195 non-null float64 16 HNR 195 non-null float64 17 status 195 non-null int64 18 RPDE 195 non-null float64 19 DFA 195 non-null float64 20 spread1 195 non-null float64 21 spread2 195 non-null float64 22 D2 195 non-null float64 23 PPE 195 non-null float64 dtypes: float64(22), int64(1), object(1) memory usage: 36.7+ KB
Parkinson's disease is a neurodegenerative disorder that primarily affects movement. It occurs when nerve cells (neurons) in the brain that produce dopamine, a chemical messenger responsible for smooth and coordinated muscle movements, become impaired or die. This leads to symptoms such as tremors, stiffness, slowness of movement, and difficulties with balance and coordination. Parkinson's disease is chronic and progressive, meaning it worsens over time, but treatments are available to help manage its symptoms.
NHR (Noise-to-Harmonics Ratio): This is a measure of the ratio of noise to tonal components in the voice. It indicates the amount of noise present in the voice signal relative to the harmonic (tonal) components.
HNR (Harmonics-to-Noise Ratio): This is the inverse of NHR and represents the ratio of harmonic (tonal) components to noise in the voice. Higher values indicate a clearer, more harmonic-rich voice signal.
RPDE (Recurrence Period Density Entropy): This is a measure of the nonlinear dynamical complexity of the voice signal. It quantifies the rate of recurrence of patterns in the signal, reflecting its predictability and complexity.
D2 (Correlation dimension): This is another measure of the nonlinear dynamical complexity of the voice signal. It quantifies the number of independent degrees of freedom in the signal, providing insights into its underlying dynamics.
DFA (Detrended Fluctuation Analysis): This is a method used to analyze the fractal properties of the voice signal. DFA calculates the fractal scaling exponent, which describes how the fluctuation of the signal changes with the length of the observation window. It provides information about the long-range correlation properties of the signal.
df.shape
(195, 24)
len(df)
195
df.dtypes
name object MDVP:Fo(Hz) float64 MDVP:Fhi(Hz) float64 MDVP:Flo(Hz) float64 MDVP:Jitter(%) float64 MDVP:Jitter(Abs) float64 MDVP:RAP float64 MDVP:PPQ float64 Jitter:DDP float64 MDVP:Shimmer float64 MDVP:Shimmer(dB) float64 Shimmer:APQ3 float64 Shimmer:APQ5 float64 MDVP:APQ float64 Shimmer:DDA float64 NHR float64 HNR float64 status int64 RPDE float64 DFA float64 spread1 float64 spread2 float64 D2 float64 PPE float64 dtype: object
df.describe().to_excel("Summary.xlsx")
# status - health status of the subject (one) - Parkinson's, (zero) – healthy
import matplotlib.pyplot as plt
df.status.hist()
plt.xlabel("status")
plt.ylabel("No. of Patients")
plt.plot()
plt.show()
import pandas as pd
from ydata_profiling import ProfileReport
ProfileReport(df)
Summarize dataset: 0%| | 0/5 [00:00<?, ?it/s]
Generate report structure: 0%| | 0/1 [00:00<?, ?it/s]
Render HTML: 0%| | 0/1 [00:00<?, ?it/s]